knitr::opts_knit$set(self_contained = FALSE)
knitr::opts_knit$set(browser = TRUE)
a. Install and load the following packages:
tidyversestringrplotlycrosstalkhtmltoolsDT# Clear environment
rm(list = ls())
# Load packages
packages <- c(
"tidyverse", "stringr", "plotly", "crosstalk", "htmltools", "DT"
)
for (pkg in packages) {
if (!require(pkg, character.only = TRUE)) {
install.packages(pkg, repos = "https://cloud.r-project.org/", dependencies = TRUE)
library(pkg, character.only = TRUE)
}
}
b. Import the ACS dataset into your R Markdown or R Notebook.
raw_df <- read_csv("../../Data/acs_data.csv")
c. Clean the dataset using the
clean_acs_metadata() function provided below.
d. Explore the data by answering the following questions:
# Dimensions
nrow(acs_wide); ncol(acs_wide)
## [1] 52
## [1] 87
# Columms
names(acs_wide)
## [1] "geoid"
## [2] "state"
## [3] "sex_by_age_total"
## [4] "sex_by_age_total_male"
## [5] "sex_by_age_total_male_under_5_years"
## [6] "sex_by_age_total_male_5_to_9_years"
## [7] "sex_by_age_total_male_10_to_14_years"
## [8] "sex_by_age_total_male_15_to_17_years"
## [9] "sex_by_age_total_male_18_and_19_years"
## [10] "sex_by_age_total_male_20_years"
## [11] "sex_by_age_total_male_21_years"
## [12] "sex_by_age_total_male_22_to_24_years"
## [13] "sex_by_age_total_male_25_to_29_years"
## [14] "sex_by_age_total_male_30_to_34_years"
## [15] "sex_by_age_total_male_35_to_39_years"
## [16] "sex_by_age_total_male_40_to_44_years"
## [17] "sex_by_age_total_male_45_to_49_years"
## [18] "sex_by_age_total_male_50_to_54_years"
## [19] "sex_by_age_total_male_55_to_59_years"
## [20] "sex_by_age_total_male_60_and_61_years"
## [21] "sex_by_age_total_male_62_to_64_years"
## [22] "sex_by_age_total_male_65_and_66_years"
## [23] "sex_by_age_total_male_67_to_69_years"
## [24] "sex_by_age_total_male_70_to_74_years"
## [25] "sex_by_age_total_male_75_to_79_years"
## [26] "sex_by_age_total_male_80_to_84_years"
## [27] "sex_by_age_total_male_85_years_and_over"
## [28] "sex_by_age_total_female"
## [29] "sex_by_age_total_female_under_5_years"
## [30] "sex_by_age_total_female_5_to_9_years"
## [31] "sex_by_age_total_female_10_to_14_years"
## [32] "sex_by_age_total_female_15_to_17_years"
## [33] "sex_by_age_total_female_18_and_19_years"
## [34] "sex_by_age_total_female_20_years"
## [35] "sex_by_age_total_female_21_years"
## [36] "sex_by_age_total_female_22_to_24_years"
## [37] "sex_by_age_total_female_25_to_29_years"
## [38] "sex_by_age_total_female_30_to_34_years"
## [39] "sex_by_age_total_female_35_to_39_years"
## [40] "sex_by_age_total_female_40_to_44_years"
## [41] "sex_by_age_total_female_45_to_49_years"
## [42] "sex_by_age_total_female_50_to_54_years"
## [43] "sex_by_age_total_female_55_to_59_years"
## [44] "sex_by_age_total_female_60_and_61_years"
## [45] "sex_by_age_total_female_62_to_64_years"
## [46] "sex_by_age_total_female_65_and_66_years"
## [47] "sex_by_age_total_female_67_to_69_years"
## [48] "sex_by_age_total_female_70_to_74_years"
## [49] "sex_by_age_total_female_75_to_79_years"
## [50] "sex_by_age_total_female_80_to_84_years"
## [51] "sex_by_age_total_female_85_years_and_over"
## [52] "race_total"
## [53] "race_total_white_alone"
## [54] "race_total_black_or_african_american_alone"
## [55] "race_total_american_indian_and_alaska_native_alone"
## [56] "race_total_asian_alone"
## [57] "race_total_native_hawaiian_and_other_pacific_islander_alone"
## [58] "race_total_some_other_race_alone"
## [59] "race_total_two_or_more_races"
## [60] "hispanic_or_latino_origin_total"
## [61] "hispanic_or_latino_origin_total_not_hispanic_or_latino"
## [62] "hispanic_or_latino_origin_total_hispanic_or_latino"
## [63] "occupancy_status_total"
## [64] "occupancy_status_total_occupied"
## [65] "occupancy_status_total_vacant"
## [66] "tenure_total"
## [67] "tenure_total_owner_occupied"
## [68] "tenure_total_renter_occupied"
## [69] "gross_rent_as_a_percentage_of_household_income_in_the_past_12_months_total"
## [70] "gross_rent_as_a_percentage_of_household_income_in_the_past_12_months_total_less_than_100_percent"
## [71] "gross_rent_as_a_percentage_of_household_income_in_the_past_12_months_total_100_to_149_percent"
## [72] "gross_rent_as_a_percentage_of_household_income_in_the_past_12_months_total_150_to_199_percent"
## [73] "gross_rent_as_a_percentage_of_household_income_in_the_past_12_months_total_200_to_249_percent"
## [74] "gross_rent_as_a_percentage_of_household_income_in_the_past_12_months_total_250_to_299_percent"
## [75] "gross_rent_as_a_percentage_of_household_income_in_the_past_12_months_total_300_to_349_percent"
## [76] "gross_rent_as_a_percentage_of_household_income_in_the_past_12_months_total_350_to_399_percent"
## [77] "gross_rent_as_a_percentage_of_household_income_in_the_past_12_months_total_400_to_499_percent"
## [78] "gross_rent_as_a_percentage_of_household_income_in_the_past_12_months_total_500_percent_or_more"
## [79] "mortgage_status_by_selected_monthly_owner_costs_as_a_percentage_of_household_income_in_the_past_12_months_total"
## [80] "mortgage_status_by_selected_monthly_owner_costs_as_a_percentage_of_household_income_in_the_past_12_months_total_housing_units_with_a_mortgage"
## [81] "mortgage_status_by_selected_monthly_owner_costs_as_a_percentage_of_household_income_in_the_past_12_months_total_housing_units_with_a_mortgage_less_than_100_percent"
## [82] "mortgage_status_by_selected_monthly_owner_costs_as_a_percentage_of_household_income_in_the_past_12_months_total_housing_units_with_a_mortgage_100_to_149_percent"
## [83] "mortgage_status_by_selected_monthly_owner_costs_as_a_percentage_of_household_income_in_the_past_12_months_total_housing_units_with_a_mortgage_150_to_199_percent"
## [84] "mortgage_status_by_selected_monthly_owner_costs_as_a_percentage_of_household_income_in_the_past_12_months_total_housing_units_with_a_mortgage_200_to_249_percent"
## [85] "mortgage_status_by_selected_monthly_owner_costs_as_a_percentage_of_household_income_in_the_past_12_months_total_housing_units_with_a_mortgage_250_to_299_percent"
## [86] "mortgage_status_by_selected_monthly_owner_costs_as_a_percentage_of_household_income_in_the_past_12_months_total_housing_units_with_a_mortgage_300_to_349_percent"
## [87] "mortgage_status_by_selected_monthly_owner_costs_as_a_percentage_of_household_income_in_the_past_12_months_total_housing_units_with_a_mortgage_350_to_399_percent"
# Value checks: not printing here, but this will show summary stats for all variables
# summary(acs_wide)
# Check duplicates on geoid / state
sum(duplicated(acs_wide$geoid))
## [1] 0
sum(duplicated(acs_wide$state))
## [1] 0
# Categorical checks
table(acs_wide$state)
##
## Alabama Alaska Arizona
## 1 1 1
## Arkansas California Colorado
## 1 1 1
## Connecticut Delaware District of Columbia
## 1 1 1
## Florida Georgia Hawaii
## 1 1 1
## Idaho Illinois Indiana
## 1 1 1
## Iowa Kansas Kentucky
## 1 1 1
## Louisiana Maine Maryland
## 1 1 1
## Massachusetts Michigan Minnesota
## 1 1 1
## Mississippi Missouri Montana
## 1 1 1
## Nebraska Nevada New Hampshire
## 1 1 1
## New Jersey New Mexico New York
## 1 1 1
## North Carolina North Dakota Ohio
## 1 1 1
## Oklahoma Oregon Pennsylvania
## 1 1 1
## Puerto Rico Rhode Island South Carolina
## 1 1 1
## South Dakota Tennessee Texas
## 1 1 1
## Utah Vermont Virginia
## 1 1 1
## Washington West Virginia Wisconsin
## 1 1 1
## Wyoming
## 1
a. Create new percentage race variables:
pct_whitepct_blackpct_asianpct_hispb. Create percentage housing tenure variables:
pct_ownerpct_renter# Create pct_white, pct_black, pct_asian, pct_hisp, pct_owner, and pct_renter variables
acs_wide <- acs_wide %>%
mutate(
pct_white = race_total_white_alone / race_total,
pct_black = race_total_black_or_african_american_alone / race_total,
pct_asian = race_total_asian_alone / race_total,
pct_hisp = hispanic_or_latino_origin_total_hispanic_or_latino / hispanic_or_latino_origin_total,
pct_owner = tenure_total_owner_occupied / tenure_total,
pct_renter = tenure_total_renter_occupied / tenure_total
)
c. Create age bucket variables (using the code provided below):
age_children (0–17)age_adults (18–64)age_seniors (65+)# Age groups are extremely granular. Bucket them into children (0 - 17), adult (18 - 64), and seniors (65 +)
acs_wide <- acs_wide %>%
mutate(
age_children = rowSums(
select(., matches("sex_by_age_total_(male|female)_(under_5|5_to_9|10_to_14|15_to_17)_years$")),
na.rm = TRUE
),
age_adults = rowSums(
select(., matches("sex_by_age_total_(male|female)_(18_and_19|20|21|22_to_24|25_to_29|30_to_34|35_to_39|40_to_44|45_to_49|50_to_54|55_to_59|60_and_61|62_to_64)_years$")),
na.rm = TRUE
),
age_seniors = rowSums(
select(., matches("sex_by_age_total_(male|female)_(65_and_66|67_to_69|70_to_74|75_to_79|80_to_84|85_years_and_older)")),
na.rm = TRUE
)
)
acs_wide <- acs_wide %>%
mutate(pct_seniors = age_seniors / sex_by_age_total * 100)
Create a filter_select() that filters the dataset by state.
state_filter <- filter_select(
id = "state_filter",
label = "Select a State:",
sharedData = sd,
group = ~state
)
a. How does the filter_select() know which rows to include/exclude?
It filters based on the keys and current selection stored in the
SharedData object. When a state is chosen, it marks only
the rows whose state value matches the selection as “active.”
b. Why does it only work when the input is a SharedData object?
filter_select() needs SharedData because
that’s where Crosstalk keeps the reactive state, including the keys,
group, and selected rows. A regular data frame has no mechanism for
tracking or broadcasting selections to other widgets.
Using the same sd SharedData object:
a. Create a Plotly bar chart that displays at least two racial groups.
race_plot <- sd %>%
plot_ly(
x = ~state,
y = ~race_total_white_alone,
type = "bar",
name = "White"
) %>%
add_bars(y = ~race_total_black_or_african_american_alone, name = "Black") %>%
layout(
barmode = "stack",
title = "Race Distribution (Absolute Counts)",
xaxis = list(title = ""),
yaxis = list(title = "Population")
)
b. Place the chart next to your state filter using
the bscols() layout function.
bscols(
widths = c(3, 9),
list(
h3("Filters"),
state_filter
),
list(
h3("State Race Composition"),
race_plot
)
)
c. Add a simple HTML summary box that shows a reactive metric (e.g., percent seniors).
See the example below.
This code below creates a static summary box:
summary_box <- htmltools::div(
style = "
background:#eef3f7;
padding:18px;
border-radius:8px;
font-size:18px;
width:95%;
margin-top:10px;
",
htmltools::strong("Percent Senior Population: "),
sd$data() %>%
mutate(pct_seniors = age_seniors / sex_by_age_total) %>%
pull(pct_seniors) %>%
mean(na.rm = TRUE) %>%
scales::percent()
)
bscols(
widths = c(3, 9),
list(
h3("Filters"),
state_filter,
br(),
summary_box
),
list(
h3("State Race Composition"),
race_plot
)
)
How can we make it reactive?
# add new variable above the creation of shareddata
# summarywidget
#devtools::install_github("kent37/summarywidget")
library(summarywidget)
summary_box_reactive <- htmltools::div(
style = "
background:#eef3f7;
padding:18px;
border-radius:8px;
font-size:18px;
width:95%;
margin-top:10px;
text-align: center;
",
htmltools::strong("% Senior Population:"),
htmltools::tags$div(
summarywidget(sd, statistic = 'mean', "pct_seniors", digits = 1),
style="font-size: 2.4em; font-weight: 700; margin-top: 10px;"
)
)
bscols(
widths = c(3, 9),
list(
h3("Filters"),
state_filter,
br(),
summary_box_reactive
),
list(
h3("State Race Composition"),
race_plot
)
)